Introduction

Within the context of recent economic growth within the City of Buffalo (before Covid-19), this project will examine changes in Single Family Housing Prices from 2017 - 2020. Buffalo has experienced rising real estate values in response to economic development projects, university expansions and workforce development programs. This project will compare changes in housing prices across neighborhoods and investigate possible factors that influence housing prices such as number of bathrooms, bedrooms and total living area.

Materials and methods

Sources: Open Data Buffalo, Tax Assessment Data 2017 - 2020 Open Data Buffalo, Neighborhood Boundary Shapefile

Packages

library(tidyverse)
library(ggplot2)
library(ggmap)
library(maptools)
library(ggthemes)
library(rgeos)
library(broom)
library(plyr)
library(dplyr)
library(grid)
library(gridExtra)
library(reshape2)
library(scales)
library(sp)
library(sf)
library(rgdal)
library(RColorBrewer)
library(kableExtra)
library(leaflet)
knitr::opts_chunk$set(cache=TRUE) 

Data Download & Cleaning

# 2017 - 2019 Buffalo Assessment Roll
Parcel17 <- read.csv(file = "https://raw.githubusercontent.com/geo511-2020/geo511-2020-project-erikwoyc/master/2017-2018_Assessment_Roll.csv")
SingleFam_propclass <- c("210", "215", "240", "241", "250", "270")
Buffalo_17 <- filter(Parcel17, PROPERTY.CLASS %in% SingleFam_propclass)

# 2019 - 2020 Buffalo Assessment Roll
Parcel20 <- read.csv(file = "https://raw.githubusercontent.com/geo511-2020/geo511-2020-project-erikwoyc/master/2019-2020_Assessment_Roll.csv")
SingleFam_propclass <- c("210", "215", "240", "241", "250", "270")
Buffalo_20 <- filter(Parcel20, PROPERTY.CLASS %in% SingleFam_propclass)

# Neighborhood Shapefile
Neighborhood_URL <- "https://data.buffalony.gov/api/geospatial/q9bk-zu3p?method=export&format=GeoJSON"
Buffalo_Neighborhoods <- st_read(dsn = Neighborhood_URL)
Buffalo_sp <- as_Spatial(Buffalo_Neighborhoods)

Snapshot of Assessment Data from 2017 - 2018

Parcel17 %>% 
  select(PROPERTY.CLASS, PROP.CLASS.DESCRIPTION, TOTAL.VALUE, TOTAL.LIVING.AREA, NEIGHBORHOOD, LOCATION) %>%
  slice(1:10) %>% 
  kable(digits=2,align="c") %>% 
  kable_styling(bootstrap_options = 
                  c("striped", "hover", "condensed", "responsive")) 
PROPERTY.CLASS PROP.CLASS.DESCRIPTION TOTAL.VALUE TOTAL.LIVING.AREA NEIGHBORHOOD LOCATION
710 MANUFACTURING & PROCESSING 825000 0
350 URBAN RENEWAL VACANT LAND 6900 0
841 MOTOR VEHICLE 22440000 0 Parkside (42.93973696004812, -78.83864068992023)
311 RESIDENTIAL VACANT LAND 400 0
220 TWO FAMILY DWELLING 54500 2302 Elmwood Bidwell (42.91848032397568, -78.86218043101054)
482 DOWNTOWN ROW TYPE (DETACHED) 275000 0 Elmwood Bidwell (42.9194355694349, -78.87709280994117)
482 DOWNTOWN ROW TYPE (DETACHED) 34500 0 Elmwood Bidwell (42.920238159038476, -78.85944723608432)
210 ONE FAMILY DWELLING 210000 2106 Elmwood Bidwell (42.918710564992416, -78.87441389506147)
482 DOWNTOWN ROW TYPE (DETACHED) 407500 0 Elmwood Bidwell (42.91772431790134, -78.87710775061227)
220 TWO FAMILY DWELLING 110000 2076 Elmwood Bidwell (42.9198347309033, -78.87709262249685)

Data Exploration

# 2017 - 2018 Single Family Housing Price Histogram
Plot_2017 <- ggplot(data = Buffalo_17, mapping = aes(x = TOTAL.VALUE)) + 
  geom_histogram() + xlab("Total Property Value($)") + ylab("Count") +
  scale_fill_manual(values="lightblue") + theme_few() +
  labs(x="Total Value($)", y="Count", title="Distribution of Buffalo Home Prices",
       subtitle="Single Family Property Prices (2017 - 2018)", 
       caption="Source: Buffalo Open Data") + scale_x_continuous() + scale_y_continuous()
plot(Plot_2017)

# 2019 - 2020 Single Family Housing Price Histogram
Plot_2019 <- ggplot(data = Buffalo_20, mapping = aes(x = TOTAL.VALUE)) + 
  geom_histogram() + xlab("Total Property Value($)") + ylab("Count") +
  scale_fill_manual(values="lightblue") + theme_few() +
  labs(x="Total Value($)", y="Count", title="Distribution of Buffalo Home Prices",
       subtitle="Single Family Property Prices (2019 - 2020)", 
       caption="Source: Buffalo Open Data") + scale_x_continuous() + scale_y_continuous()
plot(Plot_2019)

Buffalo Base Map

#Buffalo Bounding Box
Buffalo_bbox <- Buffalo_sp@bbox

# Download the basemap
basemap <- get_stamenmap(
  bbox = Buffalo_bbox,
  zoom = 13,
  maptype = "toner-lite")

2017 - 2018 Assessment Roll Plot

SingleFam17 <- ggmap(basemap) +
  geom_point(data = Buffalo_17, aes(x = LONGITUDE, y = LATITUDE, color = TOTAL.VALUE), 
             size = .025, alpha = 0.7) +
  scale_color_gradient("Single Family Home Price", low = "light green", high = "dark green", trans="log",
                       labels = scales::dollar_format(prefix = "$")) +
  labs(title="Distribution of Buffalo Home Prices",
       subtitle="Property Prices (2017 - 2018)",
       caption="Open Data Buffalo")
SingleFam17

2019 - 2020 Assessment Roll Plot

SingleFam20 <- ggmap(basemap) + 
  geom_point(data = Buffalo_20, aes(x = LONGITUDE, y = LATITUDE, color = TOTAL.VALUE), 
             size = .025, alpha = 0.7) +
  scale_color_gradient("Single Family Home Price", low = "light green", high = "dark green", trans="log",
                       labels = scales::dollar_format(prefix = "$")) +
  labs(title="Distribution of Buffalo Home Prices",
       subtitle="Property Prices (2019 - 2020)",
       caption="Open Data Buffalo")
SingleFam20

Interactive Map

Single Family Home Prices 2017 - 2020

#Color Pallette
pallete <- colorNumeric("viridis", NULL)

Neighborhood_map <- leaflet() %>%
  setMaxBounds(lng1 = -78.91246, lat1 = 42.82603, lng2 = -78.79504, lat2 = 42.96641) %>%
  addProviderTiles("CartoDB") %>%
  addProviderTiles("Stamen.TonerLines",
                   options = providerTileOptions(opacity = 0.35)) %>%
  addCircles(data = Buffalo_17, lng = Buffalo_17$LONGITUDE, lat = Buffalo_17$LATITUDE, 
             color = ~pallete(log(Buffalo_17$TOTAL.VALUE)),
             radius = .05, opacity = 0.5,
             group = "2017 - 2018") %>%
  addCircles(data = Buffalo_20, lng = Buffalo_20$LONGITUDE, lat = Buffalo_20$LATITUDE, 
             color = ~pallete(log(Buffalo_20$TOTAL.VALUE)),
             radius = .05, opacity = 0.5,
             group = "2019 - 2020")  %>%
  addPolygons(data = Buffalo_sp, fillColor = "transparent", color = "#444444", weight = 2) %>%
  addLayersControl(overlayGroups = c("2017-2018", "2019-2020")) %>%
  addLegend(position = "bottomleft", pal = pallete, values = Buffalo_20$TOTAL.VALUE,
            title = "Single Family Home Value")
Neighborhood_map

Further Data Exploration

Distribution of Single Family Homes by Year Built

Year_built <- ggplot(data = Buffalo_20, mapping = aes(x = YEAR.BUILT)) + 
  geom_histogram() + xlab("Year Built") + ylab("Number of Homes") +
  scale_fill_manual(values="lightblue") + theme_few() +
  labs(x="Year Built", y="Number of Homes", title="Distribution of Single Family Homes by Year Built", 
       caption="Source: Buffalo Open Data") + scale_x_continuous() + scale_y_continuous()
Year_built

Price by Living Area 2019 - 2020

# Price by Living Area 2019 - 2020
live_price20 <- ggplot(data = Buffalo_20, aes(x = TOTAL.LIVING.AREA, y = TOTAL.VALUE)) +
  labs(x = "Total Living Area (sqft)", y = "Total Value Single Family Home", title = "Price by Square ft of Living Space") +
  geom_point()
live_price20

Price by Bedrooms 2019 - 2020

# Price by Bedrooms  2019 - 2020
bed_price <- ggplot(data = Buffalo_20, aes(x = X..OF.BEDS, y = TOTAL.VALUE)) +
  labs(x = "Number of Bedrooms", y = "Total Value Single Family Home", title = "Price by Number of Bedrooms") +
  geom_col()
bed_price

Median Price by Neighborhood

data_20 <- ddply(Buffalo_20, c("NEIGHBORHOOD"), summarise,
                     medianPrice = median(TOTAL.VALUE))
data_17 <- ddply(Buffalo_17, c("NEIGHBORHOOD"), summarise,
                 medianPrice = median(TOTAL.VALUE))
data.neighborhoods <- left_join(data_17, data_20, by = "NEIGHBORHOOD")
colnames(data.neighborhoods)[1] <- "nhbdname"
data.neigh <- data.neighborhoods[-c(1,34), ]
View(data.neigh)

Median_Price <- ggplot(data = data.neigh, mapping = aes(x = nhbdname, y = medianPrice.y)) + 
  geom_col() + theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  labs(x="Neighborhood", y = "Price ($)", title="Median Price Single Family Home",
       subtitle="Median Price by Neighborhood", 
       caption="Source: Buffalo Open Data") + scale_y_continuous(labels=scales::dollar_format())
plot(Median_Price)

Regression Analysis

## Transform Data for Regression
Buffalo_20$log <- log10(Buffalo_20$TOTAL.VALUE)
View(Buffalo_20)

## Multiple Linear Regression
MLR <- lm(log ~ X..OF.BATHS + X..OF.BEDS +
          + YEAR.BUILT + TOTAL.LIVING.AREA + PROPERTY.CLASS + BASEMENT.TYPE,
          data = Buffalo_20)
summary(MLR)
## 
## Call:
## lm(formula = log ~ X..OF.BATHS + X..OF.BEDS + +YEAR.BUILT + TOTAL.LIVING.AREA + 
##     PROPERTY.CLASS + BASEMENT.TYPE, data = Buffalo_20)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.34156 -0.12204  0.00818  0.15185  1.12611 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       -8.373e+00  4.388e+00  -1.908   0.0564 .  
## X..OF.BATHS        7.396e-02  3.214e-03  23.008   <2e-16 ***
## X..OF.BEDS        -5.799e-02  1.682e-03 -34.470   <2e-16 ***
## YEAR.BUILT         3.541e-03  4.767e-05  74.287   <2e-16 ***
## TOTAL.LIVING.AREA  3.244e-04  2.837e-06 114.327   <2e-16 ***
## PROPERTY.CLASS     2.651e-02  2.089e-02   1.269   0.2045    
## BASEMENT.TYPE      8.841e-02  1.587e-03  55.724   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2335 on 37795 degrees of freedom
##   (62 observations deleted due to missingness)
## Multiple R-squared:  0.5164, Adjusted R-squared:  0.5163 
## F-statistic:  6727 on 6 and 37795 DF,  p-value: < 2.2e-16

Conclusions

References

All sources are cited in a consistent manner